# Title: Nyando Agroforestry Practices ###
# Author details: Levi Orero, Contact: l.orero@cgiar.org###
# Script and data info: This script performs analyses on tree data.##
# Data consists of counts of trees,species and agroforestry practices. 
# Data was collected in the Nyando river basin between May-Sept 2016. 
# Data retrieved from R working directory

# Copyright statement: This script is the product of ICRAF Systems Dept

# Table 3 : Summary of household socioeconomic characteristics#####
# ANALYSIS FOR HOUSEHOLD CHARACTERISTICS 
# load packages
library(readr)
library(qwraps2)
library(summarytools)
library(tidyverse)
library(ggplot2)
library(ggpubr)

# read data
nyando.socio <- read_csv("nyando_socioeconomic.csv")

# Overall Summary Table 3
nyando.socio %>%
    summarise(
        # household size
        size = mean(hh_size),
        # land owned in ha
        owned = mean(land_owned_ha),
        # land operated in ha
        operated = mean(land.operated_ha),
        # mean age
        age = mean(hh_head_age),
        # household head is male
        male_perc = n_perc(hh_head_gender == "M"))

# Summaries by region
nyando.socio %>% group_by(block) %>% 
    summarise(
        # household size
        size = mean(hh_size),
        # land owned in ha
        owned = mean(land_owned_ha),
        # land operated in ha
        operated = mean(land.operated_ha),
        # mean age
        age = mean(hh_head_age),
        # household head is male
        male_perc = n_perc(hh_head_gender == "M"))

# Summaries by group type
nyando.socio %>% group_by(group_type) %>% 
    summarise(
        # household size
        size = mean(hh_size),
        # land owned in ha
        owned = mean(land_owned_ha),
        # land operated in ha
        operated = mean(land.operated_ha),
        # mean age
        age = mean(hh_head_age),
        # household head is male
        male_perc = n_perc(hh_head_gender == "M"))


## Calculating One way ANOVA Welch's
## anova for unequal variances By
## region
oneway.test(land_owned_ha ~ block, data = nyando.socio, 
    var.equal = FALSE)  #land owned

oneway.test(land.operated_ha ~ block, 
    data = nyando.socio, var.equal = FALSE)  #land operated

oneway.test(hh_head_age ~ block, data = nyando.socio, 
    var.equal = FALSE)  #age

oneway.test(hh_size ~ block, data = nyando.socio, 
    var.equal = FALSE)  #size

oneway.test((hh_head_gender == "M") ~ 
    block, data = nyando.socio, var.equal = FALSE)  #gender

## Lower Region By group type
oneway.test(land_owned_ha ~ group_type,
            data = nyando.socio %>%
                filter(block == "Lower"),
            var.equal = FALSE)  #land owned

oneway.test(land.operated_ha ~ group_type,
            data = nyando.socio %>%
                filter(block == "Lower"),
            var.equal = FALSE)  #land operated

oneway.test(hh_head_age ~ group_type,
            data = nyando.socio %>%
                filter(block == "Lower"),
            var.equal = FALSE)  #age

oneway.test(hh_size ~ group_type,
            data = nyando.socio %>%
                filter(block == "Lower"),
            var.equal = FALSE)  #size

oneway.test((hh_head_gender == "M") ~ 
    group_type, data = nyando.socio %>% 
    filter(block == "Lower"), var.equal = FALSE)  #gender


## Middle Region By group type
oneway.test(land_owned_ha ~ group_type,
            data = nyando.socio %>%
                filter(block == "Middle"),
            var.equal = FALSE)  #land owned

oneway.test(land.operated_ha ~ group_type,
            data = nyando.socio %>%
                filter(block == "Middle"),
            var.equal = FALSE)  #land operated

oneway.test(hh_head_age ~ group_type,
            data = nyando.socio %>%
                filter(block == "Middle"),
            var.equal = FALSE)  #age

oneway.test(hh_size ~ group_type,
            data = nyando.socio %>%
                filter(block == "Middle"),
            var.equal = FALSE)  #size

oneway.test((hh_head_gender == "M") ~ 
    group_type, data = nyando.socio %>% 
    filter(block == "Middle"), var.equal = FALSE)  #gender

# Table 4 : Comparison of number and
# size of trees and shrubs on
# farm
##### ANALYSIS FOR TREE POPULATION AND PROPORTIONS
# Table # 4 load data
nyando_agro <- read_csv("nyando_agroforestry.csv", 
    guess_max = 10000)

# Clean column names
library(janitor)
nyando_agro <- clean_names(nyando_agro)

## Create column for DBH classes
nyando.class = nyando_agro %>%
    mutate(dbh.class = ifelse((dbh < 10), "<10cm",
                              ifelse((dbh >= 10 & dbh <= 20),
                                     "10-20cm",
                                     ifelse((dbh > 20 & dbh <= 30),
                                            "20-30cm",
                                            ifelse((dbh > 30 & dbh <= 40),
                                                   "30-40cm", ">40cm"))))) %>% 
    mutate(dbh.class = factor(dbh.class, levels = c("<10cm",
                                 "10-20cm",
                                 "20-30cm",
                                 "30-40cm",
                                 ">40cm")))

## Counts and proportions Lower Project
nyando.class %>%
    filter(type_r == "Lower Project") %>% 
    group_by(dbh.class) %>%
    summarise(count = n()) %>% 
    mutate(proportion = (count * 100)/sum(count))

## Lower Control
nyando.class %>%
    filter(type_r == "Lower Control") %>% 
    group_by(dbh.class) %>%
    summarise(count = n()) %>% 
    mutate(proportion = (count * 100)/sum(count))

## Middle Project
nyando.class %>%
    filter(type_r == "Middle Project") %>% 
    group_by(dbh.class) %>%
    summarise(count = n()) %>% 
    mutate(proportion = (count * 100)/sum(count))

## Middle Control
nyando.class %>%
    filter(type_r == "Middle Control") %>% 
    group_by(dbh.class) %>%
    summarise(count = n()) %>% 
    mutate(proportion = (count * 100)/sum(count))



# TABLE 5: Vegetation structure measurements (tree density per ha)
# ANALYSIS FOR TREE DENSITY AND AGROFORESTRY#
# Agroforestry Paper Analysis 
# Recall working datasets
head(nyando_agro)
# set features as factors
nyando_agro$block = as.factor(nyando_agro$block)
nyando_agro$type_r = as.factor(nyando_agro$type_r)

# Create subset for Total Lower Project (TLP)
nyando_lp <- subset(nyando_agro,
                   type_r == "Lower Project")

# Create subset for Total Lower Control (TLC)
nyando_lc <- subset(nyando_agro, type_r == "Lower Control")


# Create subset for Total Middle Project (TMP)
nyando_mp <- subset(nyando_agro,
                    type_r == "Middle Project")

# Create subset for Total Middle Control(TMC)
nyando_mc <- subset(nyando_agro,
                    type_r == "Middle Control")

# Create a population DB for all data

# create copy of file
nyando_count <- nyando_agro
# add new column called count
nyando_count$count <- 1
# select specific columns
nyando_count_1 <- nyando_count %>% 
    select(interviewee_name, block, type_r, area_hectare, count) 

# aggregate these    
nyando_count_2 <- aggregate(. ~ interviewee_name + block + type_r,
                            nyando_count_1, sum)


# Using 5 DBH Classes of five DBH
# classes of trees specify [<10,
# 10-20, 20-30, 30-40, and >40 cm]

## Tree density of less than 10 cm
## (<10cm)###########################
## <10cm
nyando_agro_d1 <- filter(nyando_agro, dbh < 10)
nyando_agro_d1$count <- 1
nyando_d11 <- nyando_agro_d1[, c("interviewee_name", 
    "block", "type_r", "area_hectare", 
    "count")]
aggdata1 <- aggregate(. ~ interviewee_name + 
    block + type_r, nyando_d11, sum)

aggdata1$land <- nyando.socio$land_owned_ha[match(
    aggdata1$interviewee_name, 
    nyando.socio$name)]
# match interviewee names from socio
# economic data

aggdata1$tree_d <- aggdata1$count/aggdata1$land

## Omit infinite values in the
## analyses
aggdata1.b <- aggdata1 %>% filter(tree_d != 
    Inf)

## Lower vs Middle ANOVA
aov.1 <- aov(tree_d ~ block, data = aggdata1.b)
summary(aov.1)

oneway.test(tree_d ~ block, data = aggdata1.b)  #ANOVA Unequal var


## Control vs Project ANOVA (Relaxing
## the homogeneity of variance
## assumption)
oneway.test(tree_d ~ type_r, data = subset(aggdata1.b, 
    block == "Middle"))
oneway.test(tree_d ~ type_r, data = subset(aggdata1.b, 
    block == "Lower"))

# CALCULATE TREE DENSITIES###################################

## Tree density by region#################
aggdata1.b %>% group_by(block) %>% summarise(mean(tree_d))

## Tree density by treatment group (Project vs Control)####
specify_decimal <- function(x, k) trimws(format(round(x, 
    k), nsmall = k))  #decimal places
    # Function above computes the wanted
    # value to two decimal places
    # Returns: The value of x rounded to
    # 2 decimal places

aggdata1.b %>%
    group_by(type_r) %>% 
    summarise(tree_density = specify_decimal(mean(tree_d), 
        2)) %>%
    add_row(type_r = "Overall", 
    tree_density = paste0(round(mean(aggdata1.b$tree_d),2), 
        "(", round(sd(aggdata1.b$tree_d),2), 
        ")"))

## Tree density of between 10 cm and 20cm (10cm-20cm)#################
## 10-20cm
nyando_agro_d2 <- filter(nyando_agro, (dbh >= 
    10 & dbh <= 20))
nyando_agro_d2$count <- 1
nyando_d12 <- nyando_agro_d2[, c("interviewee_name", 
    "block", "type_r", "area_hectare", 
    "count")]
aggdata2 <- aggregate(. ~ interviewee_name + 
    block + type_r, nyando_d12, sum)
aggdata2$land <- nyando.socio$land_owned_ha[match(aggdata2$interviewee_name, 
    nyando.socio$name)]
aggdata2$tree_d <- aggdata2$count/aggdata2$land

## Omit infinite values in the
## analyses
aggdata2.b <- aggdata2 %>% filter(tree_d != 
    Inf)

## Lower vs Middle ANOVA
aov.2 <- aov(tree_d ~ block, data = aggdata2.b)
summary(aov.2)

oneway.test(tree_d ~ block, data = aggdata2.b)  #ANOVA Unequal var


## Control vs Project ANOVA (Relaxing the homogeneity of variance
## assumption)
oneway.test(tree_d ~ type_r, data = subset(aggdata2.b, 
    block == "Middle"))
oneway.test(tree_d ~ type_r, data = subset(aggdata2.b, 
    block == "Lower"))

# Calculate summary tree densities###################################

## Tree density by region#################
aggdata2.b %>% group_by(block) %>% summarise(mean(tree_d))

## Tree density by treatment group# (Project vs Control)####
specify_decimal <- function(x, k) trimws(format(round(x, 
    k), nsmall = k))  #decimal places

aggdata2.b %>% group_by(type_r) %>% 
    summarise(tree_density = specify_decimal(mean(tree_d), 
        2)) %>%
    add_row(type_r = "Overall", 
    tree_density = paste0(round(mean(aggdata2.b$tree_d),2), 
        "(", round(sd(aggdata2.b$tree_d),2),
        ")"))

## Tree density of between 20 cm and 30cm (20cm-30cm)#################
## 20-30cm
nyando_agro_d3 <- filter(nyando_agro, (dbh > 
    20 & dbh <= 30))
nyando_agro_d3$count <- 1
nyando_d13 <- nyando_agro_d3[, c("interviewee_name", 
    "block", "type_r", "area_hectare", 
    "count")]
aggdata3 <- aggregate(. ~ interviewee_name +
                          block + type_r, nyando_d13, sum)
aggdata3$land <- nyando.socio$land_owned_ha[match(aggdata3$interviewee_name, 
    nyando.socio$name)]
aggdata3$tree_d <- aggdata3$count/aggdata3$land

## Omit infinite values in the
## analyses
aggdata3.b <- aggdata3 %>% filter(tree_d != Inf)

## Lower vs Middle ANOVA
aov.3 <- aov(tree_d ~ block, data = aggdata3.b)
summary(aov.3)

oneway.test(tree_d ~ block, data = aggdata3.b)  #ANOVA Unequal var


## Control vs Project ANOVA (Relaxing the homogeneity of variance
## assumption)
oneway.test(tree_d ~ type_r, data = subset(aggdata3.b, 
    block == "Middle"))
oneway.test(tree_d ~ type_r, data = subset(aggdata3.b, 
    block == "Lower"))

# Calculate summary tree densities###################################

## Tree density by region#################
aggdata3.b %>% group_by(block) %>% summarise(mean(tree_d))

## Tree density by treatment group (Project vs Control)####
specify_decimal <- function(x, k) trimws(format(round(x, 
    k), nsmall = k))  #decimal places

aggdata3.b %>% group_by(type_r) %>% 
    summarise(tree_density = specify_decimal(mean(tree_d), 
        2)) %>%
    add_row(type_r = "Overall", 
    tree_density = paste0(round(mean(aggdata3.b$tree_d),2), 
        "(", round(sd(aggdata3.b$tree_d),2), 
        ")"))

## Tree density of between 30 cm and 40cm (30cm-40cm)#################
## 30-40cm
nyando_agro_d4 <- filter(nyando_agro,
                         (dbh > 30 & dbh <= 40))
nyando_agro_d4$count <- 1
nyando_d14 <- nyando_agro_d4[, c("interviewee_name", 
    "block", "type_r", "area_hectare", 
    "count")]

aggdata4 <- aggregate(. ~ interviewee_name + 
    block + type_r, nyando_d14, sum)

aggdata4$land <- nyando.socio$land_owned_ha[match(aggdata4$interviewee_name, 
    nyando.socio$name)]
aggdata4$tree_d <- aggdata4$count/aggdata4$land

## Omit infinite values in the
## analyses
aggdata4.b <- aggdata4 %>% filter(tree_d != Inf)

## Lower vs Middle ANOVA
aov.4 <- aov(tree_d ~ block, data = aggdata4.b)
summary(aov.4)

oneway.test(tree_d ~ block, data = aggdata4.b)  #ANOVA Unequal var


## Control vs Project ANOVA (Relaxing
## the homogeneity of variance
## assumption)
oneway.test(tree_d ~ type_r, data = subset(aggdata4.b, 
    block == "Middle"))
oneway.test(tree_d ~ type_r, data = subset(aggdata4.b, 
    block == "Lower"))

# Calculate summary tree densities######################################

## Tree density by region#################
aggdata4.b %>% group_by(block) %>% summarise(mean(tree_d))

## Tree density by treatment group (Project vs Control)####
specify_decimal <- function(x, k) trimws(format(round(x, 
    k), nsmall = k))  #decimal places

aggdata4.b %>% group_by(type_r) %>% 
    summarise(tree_density = specify_decimal(mean(tree_d), 
        2)) %>%
    add_row(type_r = "Overall", 
    tree_density = paste0(round(mean(aggdata4.b$tree_d),2), 
        "(", round(sd(aggdata4.b$tree_d),2), 
        ")"))

## Tree density of over 40cm DBH################
# Over 40cm
nyando_agro_d5 <- filter(nyando_agro, dbh > 40)
nyando_agro_d5$count <- 1

nyando_d15 <- nyando_agro_d5[, c("interviewee_name",
                                 "block", "type_r",
                                 "area_hectare",
                                 "count")]

aggdata5 <- aggregate(. ~ interviewee_name + 
    block + type_r, nyando_d15, sum)

aggdata5$land <- nyando.socio$land_owned_ha[match(aggdata5$interviewee_name, 
    nyando.socio$name)]
aggdata5$tree_d <- aggdata5$count/aggdata5$land

## Omit infinite values in the
## analyses
aggdata5.b <- aggdata5 %>% filter(tree_d != 
    Inf)

## Lower vs Middle ANOVA
aov.5 <- aov(tree_d ~ block, data = aggdata5.b)
summary(aov.5)

oneway.test(tree_d ~ block, data = aggdata5.b)  #ANOVA Unequal var


## Control vs Project ANOVA (Relaxing the homogeneity of variance
## assumption)
oneway.test(tree_d ~ type_r, data = subset(aggdata5.b, 
    block == "Middle"))
##oneway.test(tree_d ~ type_r, data = subset(aggdata5.b, 
  #  block == "Lower")) - Not enough observations

# Calculate summary tree densities################################

## Tree density by region#################
aggdata5.b %>% group_by(block) %>% summarise(mean(tree_d))

## Tree density by treatment group
## (Project vs Control)####
specify_decimal <- function(x, k) trimws(format(round(x, 
    k), nsmall = k))  #decimal places

aggdata5.b %>% group_by(type_r) %>% 
    summarise(tree_density = specify_decimal(mean(tree_d), 
        2)) %>% add_row(type_r = "Overall", 
    tree_density = paste0(round(mean(aggdata5.b$tree_d), 2),
        "(", round(sd(aggdata5.b$tree_d),2), 
        ")"))


### Tree density of across whole# sample#################
nyando_agro_d6 <- nyando_agro
nyando_agro_d6$count <- 1
nyando_d16 <- nyando_agro_d6[, c("interviewee_name",
                                 "block", "type_r",
                                 "area_hectare",
                                 "count")]
aggdata6 <- aggregate(. ~ interviewee_name +
                          block + type_r, nyando_d16, sum)

aggdata6$land <- nyando.socio$land_owned_ha[match(aggdata6$interviewee_name, 
    nyando.socio$name)]

aggdata6$tree_d <- aggdata6$count/aggdata6$land

## Omit infinite values in the
## analyses
aggdata6.b <- aggdata6 %>% filter(tree_d != Inf)

## Lower vs Middle ANOVA
aov.6 <- aov(tree_d ~ block, data = aggdata6.b)
summary(aov.6)

oneway.test(tree_d ~ block, data = aggdata6.b)  #ANOVA Unequal var


## Control vs Project ANOVA (Relaxing the homogeneity of variance
## assumption)
oneway.test(tree_d ~ type_r, data = subset(aggdata6.b, 
    block == "Middle"))
oneway.test(tree_d ~ type_r, data = subset(aggdata6.b, 
    block == "Lower"))

# Calculate summary tree densities###################################
specify_decimal <- function(x, k) trimws(format(round(x, 
    k), nsmall = k))  #decimal places

## Tree density by region#################
aggdata6.b %>% group_by(block) %>% 
    summarise(tree_density = specify_decimal(mean(tree_d), 2))

## Tree density by treatment group (Project vs Control)####

aggdata6.b %>% group_by(type_r) %>% 
    summarise(tree_density = specify_decimal(mean(tree_d), 
        2)) %>%
    add_row(type_r = "Overall", 
    tree_density = paste0(round(mean(aggdata6.b$tree_d),2), 
        "(", round(sd(aggdata6.b$tree_d),2), 
        ")"))



## Figure 1: The proportion number of trees and shrubs inventoried####

# Using figures from tree population counts
LP <- c("Lower Project", 1028, 218, 
    34, 3, 1)  #Lower project
LC <- c("Lower Control", 1289, 650, 
    90, 31, 9)  #Lower control
MP <- c("Middle Project", 2224, 1656, 
    410, 117, 47)  #Middle project
MC <- c("Middle Control", 741, 741, 
    275, 87, 48)  #Middle control

data.f <- data.frame(rbind(LP, LC, MP, 
    MC), row.names = NULL)  #Combine rows
# Add column names
colnames(data.f) <- c("site", "<10", 
    "10-20", "20-30", "30-40", ">40")

data.f2 <- data.f %>% gather(data.f, 
    site) %>% mutate(value = rep(c("Lower Project", 
    "Lower Control", "Middle Project", 
    "Middle Control"), 5))

colnames(data.f2) <- c("DBH", "N", "site")

## Add column for proportion
data.f2 <- data.f2 %>% mutate(total = ifelse(site == 
    "Lower Project", 1284, ifelse(site == 
    "Lower Control", 2069, ifelse(site == 
    "Middle Project", 4454, 1892))))

data.f2$N = as.numeric(data.f2$N)
data.f2 <- data.f2 %>% mutate(proportion = (N/total) * 
    100)

## CREATING FIGURE 1

### Write file into directory
write_csv(data.f2, "figure_1.csv")

figure.1 <- read_csv("figure_1.csv")

# Add region column
figure.1$region <- ifelse(figure.1$site == 
    "Lower Project" | figure.1$site == 
    "Lower Control", "Lower", "Middle")

## Order DBH and site levels
figure.1$DBH <- factor(figure.1$DBH, 
                       levels = c("<10", "10-20", "20-30", "30-40", 
                                  ">40"))
figure.1$site <- factor(figure.1$site, 
                        levels = c("Lower Project", "Lower Control", 
                                   "Middle Project", "Middle Control"))
## Test Plots
# Split to Lower and Middle
fig.1a = subset(figure.1, region == 
    "Lower")
fig.1b = subset(figure.1, region == 
    "Middle")



# Stacked barplot with multiple
# groups

ggplot(data = figure.1, aes(x = DBH, 
    y = N, fill = site)) + geom_bar(stat = "identity")

# Use position=position_dodge()#both
figure.1$region <- factor(figure.1$region, 
    labels = c("a). Lower", "b). Middle"))

p = ggplot(data = figure.1, aes(x = DBH, 
    y = proportion, fill = site)) + 
    geom_bar(stat = "identity", position = position_dodge()) + 
    facet_wrap(~region)

figure_1 <- p + labs(x = "Diameter at breast height (cm)", 
    y = "Proportions (%)") + theme_bw() + 
    theme(axis.title.x = element_text(size = 14, 
        face = "bold"), axis.title.y = element_text(size = 14, 
        face = "bold"), legend.position = "right", 
        axis.text = element_text(size = 12, 
            face = "bold")) + scale_fill_brewer(palette = "Spectral", 
    name = "Sample", labels = c("Lower Project-TLP", 
        "Lower Control-TLC", "Middle Project-TMP", 
        "Middle Control-TMC"))

figure_1



# Figure 2: Biomass estimates in TLP,TLC,TMP,TMC#####################
# CREATING FIGURE 2 
# Create data frame from figures in original Nyando paper
LP1 <- c("Lower Project", 35.5051, 42.1916, 
    24.1234, 3.1853, 8.3614)  ##LP
LC1 <- c("Lower Control", 24.2984, 131.4927, 
    30.25, 23.9544, 10.4904)  ##LC
MP1 <- c("Middle Project", 44.2665, 
    128.9358, 84.8772, 37.0872, 49.6706)  #MP
MC1 <- c("Middle Control", 177.0471, 
    162.8508, 74.3112, 49.3344, 44.0544)  #MC

db_b <- data.frame(rbind(LP1, LC1, MP1, 
    MC1), row.names = NULL)  #Combine rows
# Add column names
colnames(db_b) <- c("site", "<10", "10-20", 
    "20-30", "30-40", ">40")

db2_b <- db_b %>% gather(db_b, site) %>% 
    mutate(value = rep(c("Lower Project", 
        "Lower Control", "Middle Project", 
        "Middle Control"), 5))
# change levels ordering
db2_b$DBH <- factor(db2_b$DBH, levels = c("<10", "10-20", 
                                      "20-30", "30-40", ">40"))

colnames(db2_b) <- c("DBH", "N", "site")

## Add column for proportion
db2_b <- db2_b %>% mutate(total_site = ifelse(site == 
    "Lower Project", 113.3668, ifelse(site == 
    "Lower Control", 220.4859, ifelse(site == 
    "Middle Project", 344.8373, 507.5979)))) %>% 
  mutate(total_region = if_else(site == "Lower Project"|site == "Lower Control",
                                333.8527, 842.4352)) %>% 
  mutate(region = if_else(site == "Lower Project"|site == "Lower Control",
                                "Lower", "Middle"))

db2_b$N = as.numeric(db2_b$N)

## Find proportion for whole dataset
whole_set <- db2_b %>% group_by(DBH) %>% summarize(summar = sum(N)) %>% 
  mutate(prop_whole = 100*summar/sum(summar))
whole_set

## Find proportion for regions
region_set_lower <- db2_b %>% filter(region == "Lower") %>% 
  group_by(DBH) %>% summarize(summar = sum(N)) %>% 
  mutate(prop_lower = 100*summar/sum(summar))
region_set_lower

region_set_middle <- db2_b %>% filter(region != "Lower") %>% 
  group_by(DBH) %>% summarize(summar = sum(N)) %>% 
  mutate(prop_middle = 100*summar/sum(summar))
region_set_middle

## Find proportions for sites
db2_b <- db2_b %>% mutate(proportion_site = (N/total_site) * 
    100)

### Write file into directory
write_csv(db2_b, "figure_2.csv")

fig2 <- read_csv("figure_2.csv")


## Order DBH and site levels
fig2$DBH <- factor(fig2$DBH, levels = c("<10", 
                                        "10-20", "20-30", "30-40", ">40"))
fig2$site <- factor(fig2$site, levels = c("Lower Project", 
                                          "Lower Control", "Middle Project", 
                                          "Middle Control"))
## Plots
# Split to Lower and Middle
fig2a = subset(fig2, region == "Lower")
fig2b = subset(fig2, region == "Middle")



# Use position=position_dodge()#both
fig2$region <- factor(fig2$region, labels = c("a). Lower", 
    "b). Middle"))
figure_2 = ggplot(data = fig2, aes(x = DBH, 
    y = proportion_site, fill = site)) + 
    geom_bar(stat = "identity", position = position_dodge()) + 
    facet_wrap(~region)
figure_2 <- figure_2 + labs(x = "Diameter at breast height (cm)", 
    y = "Proportions of Above Ground Biomass (%)") + 
    theme_bw() + theme(axis.title.x = element_text(size = 9, 
    face = "bold"), axis.title.y = element_text(size = 9, 
    face = "bold"), legend.position = "right", 
    axis.text = element_text(size = 12, 
        face = "bold")) + scale_fill_brewer(palette = "Spectral", 
    name = "Sample", labels = c("Lower Project-TLP", 
        "Lower Control-TLC", "Middle Project-TMP", 
        "Middle Control-TMC"))

figure_2


#Table 7 Estimated AGB per agroforestry practice####
#Tree distribution by practice
ctable(nyando_agro[,c(4,15)])
    #4 is type in region and 15 is agro practice

    # repeat for block in Nyando region
ctable(nyando_agro[,c(2,15)]) #2 is block

#Calculate number of households per type in region with
#Boundary Planting
nyando_agro %>% group_by(type_r) %>%
    filter(agroforestry_practice == "Boundary planting") %>%
    summarise(hh = length(unique(interviewee_name)),
              dbh = paste0(round(mean(dbh),2),
                          "(",
                          round(sd(dbh),2),
                          ")"))

#Hedgerows
nyando_agro %>% group_by(type_r) %>%
    filter(agroforestry_practice == "Hedgerows") %>%
    summarise(hh = length(unique(interviewee_name)),
              dbh = paste0(round(mean(dbh),2),
                           "(",
                           round(sd(dbh),2),
                           ")"))

#MPTs
nyando_agro %>% group_by(type_r) %>%
    filter(agroforestry_practice == "MPTs") %>%
    summarise(hh = length(unique(interviewee_name)),
              dbh = paste0(round(mean(dbh),2),
                           "(",
                           round(sd(dbh),2),
                           ")"))

#Riparian buffers
nyando_agro %>% group_by(type_r) %>%
    filter(agroforestry_practice == "Riparian buffers") %>%
    summarise(hh = length(unique(interviewee_name)),
              dbh = paste0(round(mean(dbh),2),
                           "(",
                           round(sd(dbh),2),
                           ")"))

#Woodlot
nyando_agro %>% group_by(type_r) %>%
    filter(agroforestry_practice == "woodlot") %>%
    summarise(hh = length(unique(interviewee_name)),
              dbh = paste0(round(mean(dbh),2),
                           "(",
                           round(sd(dbh),2),
                           ")"))




# AGB Calculations############################################
# AGB Calculations Combined sample of all DBH
nyando.socio$interviewee_name = nyando.socio$name #add column
nyando_merged <- merge(x = nyando_agro, y = nyando.socio,
                       by = "interviewee_name", all = TRUE)

# create column for AGB per hectare representing each tree AGB
nyando_merged$agb.hectare = nyando_merged$agb_mg/
                            nyando_merged$ha

# Overall Total AGB
sum(nyando_merged$agb.hectare)
# per farm
sum(nyando_merged$agb.hectare)/
    length(unique(nyando_merged$interviewee_name))

# Total AGB by Block
nyando_merged %>% group_by(block.x)%>%
    summarise(total_agb = specify_decimal(sum(agb.hectare),2))

# Total AGB by Group in block
nyando_merged %>% group_by(type_r)%>%
    summarise(total_agb = specify_decimal(sum(agb.hectare),2))

# Mean AGB by Block
nyando_merged %>% group_by(block.x)%>%
    summarise(mean_agb = 
                  specify_decimal(sum(agb.hectare)/
                                      length(unique(interviewee_name)),2))

# Mean AGB by Group in block
nyando_merged %>% group_by(type_r)%>%
    summarise(mean_agb = 
                  specify_decimal(sum(agb.hectare)/
                                      length(unique(interviewee_name)),2))

## Calculating One way ANOVA Welch's
## anova for unequal variances for the AGB values By
## region
oneway.test(agb.hectare ~ block.x, data = nyando_merged, 
            var.equal = FALSE)  #agb by block

##By group in Lower
oneway.test(agb.hectare ~ type_r, data = nyando_merged%>%
                filter(block.x == "Lower"), 
            var.equal = FALSE)  #agb by group

##By group in Middle
oneway.test(agb.hectare ~ type_r, data = nyando_merged%>%
                filter(block.x == "Middle"), 
            var.equal = FALSE)  #agb by group




#Calculate total AGB of households per practice with
#Boundary Planting
nyando_boundary <- nyando_merged %>% 
    filter(agroforestry_practice == "Boundary planting") %>%
    group_by(interviewee_name)%>%
    summarise(type_region = first(type_r) , 
              agb = sum(agb_mg), 
              boundary.length = sum(length_metres, na.rm =TRUE))%>%
    mutate(boundary.length = 
               replace(boundary.length,
                       boundary.length==0,91.8))#replace with median

nyando_boundary %>% group_by(type_region)%>%
    summarise(agb.farm = sum(agb)/
                  sum(boundary.length*0.0002)/
                  length(unique(interviewee_name)))
    #convert length in metres to hectares Kuyah et.al
    


#Hedgerows
nyando_merged %>% group_by(type_r) %>% filter(agroforestry_practice == 
                                            "Hedgerows") %>%
    summarise(agb.hecta.hh = sum(agb.hectare)/
                  length(unique(interviewee_name)))

#MPTs
nyando_merged %>% group_by(type_r) %>% filter(agroforestry_practice == 
                                            "MPTs") %>%
    summarise(agb.hecta.hh = sum(agb.hectare)/
                  length(unique(interviewee_name)))

#Riparian buffers
nyando_merged %>% group_by(type_r) %>% filter(agroforestry_practice == 
                                            "Riparian buffers") %>%
    summarise(agb.hecta.hh = specify_decimal((sum(agb.hectare)/
                    length(unique(interviewee_name))),2))

#Woodlot
nyando_merged %>% group_by(type_r) %>% filter(agroforestry_practice == 
                                            "woodlot") %>%
    summarise(agb.hecta.hh = specify_decimal((sum(agb.hectare)/
                    length(unique(interviewee_name))),2))




#Overall AGB per practice---Overall
#Boundary Planting
#Values calculated from figures obtained from previous calculation
((3.45*17)+(4.34*28)+(7.28*23)+(4.34*24))/((17+28+23+24))
#4.91

#Hedgerows
((3.76*4)+(5.87*9))/(4+9)
#5.22

#MPTs
((1.04*20)+(1.05*25)+(1.85*27)+(2.92*26))/(20+25+27+26)
#1.76

#Riparian buffers
((5.19*1)+(223.87*1))/(1+1)
#114.53

#Woodlots
((2.31*6)+(0.52*1)+(5.31*14)+(33.75*3))/(6+1+14+3)
#7.92

#Overall AGB per practice---Per site
#Boundary Planting
#Values calculated from figures obtained from previous calculation
#Lower
((3.45*17)+(4.34*28))/((17+28))
#4.00
#Middle
((7.28*23)+(4.34*24))/((23+24))
#5.78

#Hedgerows
#Lower
((3.76*4))/(4)
#3.76
#Middle
((5.87*9))/(9)
#5.87

#MPTs
#Lower
((1.04*20)+(1.05*25))/(20+25)
#1.05
#Middle
((1.85*27)+(2.92*26))/(27+26)
#2.37

#Riparian buffers
#Lower
((5.19*1))/(1)
#5.19
#Middle
((223.87*1))/(1)
#223.87

#Woodlots
#Lower
((2.31*6)+(0.52*1))/(6+1)
#2.05
#Middle
((5.31*14)+(33.75*3))/(14+3)
#10.33



##Dominant biomass contributor species####
#Overall
nyando_merged %>% group_by(species)%>%
    summarise(total.agb = sum(agb_mg))%>%
    mutate(prop.agb = 100*total.agb/sum(total.agb))%>%
    arrange(desc(total.agb))

#Lower.................
nyando_merged %>% group_by(species)%>%
    filter(block.x == "Lower")%>%
    summarise(total.agb = sum(agb_mg))%>%
    mutate(prop.agb = (total.agb/sum(total.agb)*100))%>%
    arrange(desc(total.agb))

#Middle.................
nyando_merged %>% group_by(species)%>%
    filter(block.x == "Middle")%>%
    summarise(total.agb = sum(agb_mg))%>%
    mutate(prop.agb = (total.agb/sum(total.agb)*100))%>%
    arrange(desc(total.agb))

#Lower Project
nyando_merged %>% group_by(species)%>%
    filter(type_r == "Lower Project")%>%
    summarise(total.agb = sum(agb_mg))%>%
    mutate(prop.agb = (total.agb/sum(total.agb)*100))%>%
    arrange(desc(total.agb))

#Lower Control
nyando_merged %>% group_by(species)%>%
    filter(type_r == "Lower Control")%>%
    summarise(total.agb = sum(agb_mg))%>%
    mutate(prop.agb = (total.agb/sum(total.agb)*100))%>%
    arrange(desc(total.agb))

#Middle Project
nyando_merged %>% group_by(species)%>%
    filter(type_r == "Middle Project")%>%
    summarise(total.agb = sum(agb_mg))%>%
    mutate(prop.agb = (total.agb/sum(total.agb)*100))%>%
    arrange(desc(total.agb))

#Midddle Control
nyando_merged %>% group_by(species)%>%
    filter(type_r == "Middle Control")%>%
    summarise(total.agb = sum(agb_mg))%>%
    mutate(prop.agb = (total.agb/sum(total.agb)*100))%>%
    arrange(desc(total.agb))


######################################################################## 

## CREATING FIGURE 3 Figure 3 Create
## dataset for figure 3 Lower Nyando

#Lower
rev_nyando_fig3a <-
nyando_merged %>% 
    group_by(origin,species)%>%
    filter(block.x == "Lower")%>%
    summarise(total.agb = sum(agb_mg))%>%
    ungroup(origin)%>%
    mutate(prop.agb = (total.agb/sum(total.agb)*100))%>%
    arrange(desc(total.agb))%>% slice(1:10)%>%
    ggplot(aes(x = reorder(species,-prop.agb),
               y = prop.agb, 
               fill = origin)) + 
    geom_bar(stat = "identity") + coord_flip() + 
    scale_fill_manual(values = c("#e34a33","#fdbb84"),
                      labels = c(c("Exotic",
                                   "Indigenous")))+
    labs(x = "Tree and Shrub Species", 
         y = "Proportions (%)", title = "a). Lower") + 
    theme_bw() +
    theme(axis.title.x = element_text(size = 14, 
                                      face = "bold"), 
          axis.title.y = element_text(size = 14, 
                                      face = "bold"), 
          legend.position = "right", 
          axis.text.x = element_text(size = 12))

#Middle
rev_nyando_fig3b <-
nyando_merged %>% 
    group_by(origin, species)%>%
    filter(block.x == "Middle")%>%
    summarise(total.agb = sum(agb_mg))%>%
    ungroup(origin)%>%
    mutate(prop.agb = (total.agb/sum(total.agb)*100))%>%
    arrange(desc(total.agb))%>% slice(1:10)%>%
    ggplot(aes(x = reorder(species,-prop.agb),
               y = prop.agb, 
               fill = origin)) + 
    geom_bar(stat = "identity") + coord_flip() + 
    scale_fill_manual(values = c("#a8ddb5","#43a2ca"),
                      labels = c(c("Exotic",
                                   "Indigenous")))+
    labs(x = "Tree and Shrub Species", 
         y = "Proportions (%)", title = "b). Middle") + 
    theme_bw() +
    theme(axis.title.x = element_text(size = 14, 
                                      face = "bold"), 
          axis.title.y = element_text(size = 14, 
                                      face = "bold"), 
          legend.position = "right", 
          axis.text.x = element_text(size = 12))

#Combine the two figures
nyando_figure3 <- ggarrange(rev_nyando_fig3a, rev_nyando_fig3b +
                                font("x.text", size = 10),
                    ncol = 1, nrow = 2)
annotate_figure(nyando_figure3,
                bottom = text_grob("Data source: \n Authors' data set",
                                   color = "black",
                                   hjust = 1, x = 1, 
                                   face = "italic", size = 10),
                fig.lab = "Figure 3", fig.lab.face = "bold")


